package com.galeno.sparksql

import org.apache.spark.sql.types.{DataTypes, StructField, StructType}
import org.apache.spark.sql.{DataFrame, SparkSession}

/**
 * @Title: ${file_name}
 * @Description: ${todo}
 * @author galeno
 * @date 2021/9/410:03
 */
object C02 {
  def main(args: Array[String]): Unit = {
    val spark = SparkSession.builder().master("local").appName("C02").getOrCreate()

    //设置一个option选项:header=true则会将文件第一行默认作为表头
    val df1: DataFrame = spark.read.option("header", "true").csv("data/battel2.txt")

    df1.printSchema()
    df1.show()
    //不要再实际生产中,因为推断字段会单独触发job
    val df2: DataFrame = spark.read.option("inferSchema", "true").csv("data/battel.txt").toDF("id", "name", "role", "power")
    df2.printSchema()

    //不带表头的csv,自己指定字段类型
    val schmaSt: StructType = StructType(Seq(
      StructField("id", DataTypes.IntegerType),
      StructField("name", DataTypes.StringType),
      StructField("role", DataTypes.StringType),
      StructField("energy", DataTypes.DoubleType)
    ))
    val df4 = spark.read.schema(schmaSt).csv("data/battel.txt")
    df4.printSchema()

    val df5: DataFrame = spark.read.json("data/app_log_2021-06-05.log")
    df5.printSchema()
    df5.show(Integer.MAX_VALUE,false)



    Thread.sleep(Integer.MAX_VALUE)


  }

}
